package spring.webmagic;

import us.codecraft.webmagic.Page;
import us.codecraft.webmagic.Site;
import us.codecraft.webmagic.Spider;
import us.codecraft.webmagic.processor.PageProcessor;

public class MyPageProcessor implements PageProcessor {

    public void process(Page page) {
        //把数据交给Pipeline进行输出
        System.out.println("========================================================");
        System.out.println(page.getHtml().css("#J_cate > ul > li:nth-child(3) > a:nth-child(3)","text").get());
    }

    //可以对爬虫进行一些配置
    private Site site = Site.me();

    public Site getSite () {
        return site;
    }

        //WebMagic使用的默认下载器是HttpClient
    public static void main (String[]args){
        //提供自己定义的PageProcessor
        Spider.create(new MyPageProcessor())
                //设置初始下载url地址
                .addUrl("https://www.jd.com/")
                .run();
    }
}